Image Analysis 2 code
packages <- c("tidyverse","imager","readxl","wesanderson","RColorBrewer")
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(imager)
## Loading required package: magrittr
##
## Attaching package: 'magrittr'
##
## The following object is masked from 'package:purrr':
##
## set_names
##
## The following object is masked from 'package:tidyr':
##
## extract
##
##
## Attaching package: 'imager'
##
## The following object is masked from 'package:magrittr':
##
## add
##
## The following object is masked from 'package:stringr':
##
## boundary
##
## The following object is masked from 'package:dplyr':
##
## where
##
## The following object is masked from 'package:tidyr':
##
## fill
##
## The following objects are masked from 'package:stats':
##
## convolve, spectrum
##
## The following object is masked from 'package:graphics':
##
## frame
##
## The following object is masked from 'package:base':
##
## save.image
library(readxl)
library(wesanderson)
library(RColorBrewer)
library(MASS)
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
df<- read.csv("~/Data Science/R/image-analysis-2-final-AdamWalters1/data/image-analysis-data.csv")
summary(df)
## imageId mountain tree prettySky
## Min. : 1.0 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.: 8.5 1st Qu.:0.0000 1st Qu.:1.0000 1st Qu.:0.0000
## Median :16.0 Median :0.0000 Median :1.0000 Median :1.0000
## Mean :16.0 Mean :0.3226 Mean :0.8387 Mean :0.5161
## 3rd Qu.:23.5 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :31.0 Max. :1.0000 Max. :1.0000 Max. :1.0000
## person child animal virginiaTech
## Min. :0.0000 Min. :0.0000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :1.0000 Median :0.0000 Median :0.00000 Median :0.0000
## Mean :0.5484 Mean :0.2258 Mean :0.09677 Mean :0.3548
## 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.00000 Max. :1.0000
## food entertainment city country
## Min. :0 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0 Mean :0.3548 Mean :0.3871 Mean :0.2903
## 3rd Qu.:0 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :0 Max. :1.0000 Max. :1.0000 Max. :1.0000
## building
## Min. :0.0000
## 1st Qu.:0.0000
## Median :1.0000
## Mean :0.7097
## 3rd Qu.:1.0000
## Max. :1.0000
numFeatures <-colSums(df[,-1])
listFeatures <- names(df)[-1]
orderFeatures <- order(numFeatures,decreasing = TRUE)
listFeatures[orderFeatures]
## [1] "tree" "building" "person" "prettySky"
## [5] "city" "virginiaTech" "entertainment" "mountain"
## [9] "country" "child" "animal" "food"
barplot(numFeatures, main = "Feature Popularity", xlab = "Features", ylab = "Count")

barplot(numFeatures, main = "Feature Popularity", xlab = "Count", ylab = "Features", horiz = TRUE, col = "lightblue")

features = data.frame(Feature = listFeatures, Count = numFeatures)
ggplot(features,aes(x=reorder(Feature, Count), y=Count,fill = Feature))+
geom_bar(stat = "identity")+
coord_flip()+
scale_fill_brewer(palette = "Set3")+
theme_minimal()+
theme(legend.position="none")+
xlab("Count")+
ylab("Feature")+
ggtitle("Features Bases on Count")

bb1 <- load.image("images/bb1.jpeg")
plot(bb1)

# Sum the values in each row (excluding the first column)
image_sums <- rowSums(df[, -1])
# Find the index of the image with the highest sum
image_with_most_features <- which.max(image_sums)
# Get the corresponding image ID
image_id_with_most_features <- df$imageId[image_with_most_features]
image_id_with_most_features
## [1] 10
listImages <- df[,1]
orderImages <- order(image_sums, decreasing = TRUE)
orderImages
## [1] 10 11 28 3 5 23 25 29 31 2 6 7 9 14 26 27 1 8 12 13 15 17 18 20 22
## [26] 30 21 4 16 19 24
top_5_images <- orderImages[1:5]
# Get the corresponding image IDs
top_5_image_ids <- df$imageId[top_5_images]
top_5_image_ids
## [1] 10 11 28 3 5
for (image_id in top_5_image_ids) {
bb <- paste0("images/bb", image_id, ".jpeg")
image <- load.image(bb)
plot(image)
}




cbind(image=orderImages,count=image_sums[orderImages])[1:5,]
## image count
## [1,] 10 8
## [2,] 11 7
## [3,] 28 7
## [4,] 3 6
## [5,] 5 6
Image Analysis 3
dat2 <- df[,-1]
dat <- df
km2<-kmeans(dat2,2,nstart=20)
cbind(dat[,-1],km2$cluster)
## mountain tree prettySky person child animal virginiaTech food entertainment
## 1 0 1 0 1 0 0 0 0 0
## 2 1 1 1 0 0 0 0 0 0
## 3 0 1 0 1 0 1 0 0 1
## 4 0 0 0 0 0 0 0 0 0
## 5 0 1 1 1 0 0 1 0 0
## 6 1 1 1 0 0 0 0 0 0
## 7 0 1 1 0 0 0 1 0 0
## 8 1 1 0 0 0 0 0 0 0
## 9 1 1 1 1 0 1 0 0 0
## 10 1 1 0 1 1 0 1 0 1
## 11 0 1 1 1 1 0 1 0 1
## 12 0 1 1 0 0 0 1 0 0
## 13 0 1 1 0 0 0 0 0 1
## 14 0 1 0 1 0 0 1 0 1
## 15 1 1 1 0 0 0 0 0 0
## 16 0 1 0 0 0 0 0 0 0
## 17 0 1 0 1 0 0 0 0 0
## 18 1 1 1 0 0 0 0 0 0
## 19 0 0 0 1 0 0 1 0 0
## 20 0 1 0 1 0 0 0 0 1
## 21 0 1 0 0 0 0 1 0 0
## 22 0 1 1 0 0 0 1 0 0
## 23 0 0 1 0 1 0 1 0 1
## 24 0 0 0 1 0 0 1 0 0
## 25 0 1 0 1 1 0 0 0 1
## 26 1 1 1 1 0 0 0 0 0
## 27 0 1 0 1 1 0 0 0 1
## 28 0 1 1 1 1 0 0 0 1
## 29 0 1 0 1 1 1 0 0 1
## 30 1 0 1 0 0 0 0 0 0
## 31 1 1 1 1 0 0 0 0 0
## city country building km2$cluster
## 1 1 0 1 1
## 2 0 1 1 2
## 3 1 0 1 1
## 4 1 0 1 1
## 5 1 0 1 1
## 6 1 0 1 2
## 7 1 0 1 1
## 8 1 0 1 1
## 9 0 0 0 2
## 10 1 0 1 1
## 11 0 0 1 1
## 12 0 0 1 1
## 13 0 1 0 2
## 14 0 1 0 1
## 15 0 1 0 2
## 16 0 0 1 1
## 17 1 0 1 1
## 18 0 1 0 2
## 19 0 0 0 1
## 20 0 0 1 1
## 21 0 0 1 1
## 22 0 0 1 1
## 23 1 0 1 1
## 24 0 0 0 1
## 25 1 0 1 1
## 26 0 1 0 2
## 27 0 0 1 1
## 28 1 0 1 1
## 29 0 1 0 1
## 30 0 1 1 2
## 31 0 1 1 2
clId1<-dat[km2$cluster==1,1]
clId2<-dat[km2$cluster==2,1]
clImages1<-sapply(clId1,function(id)paste("images/bb",id,".jpeg",sep=""))
clImages2<-sapply(clId2,function(id)paste("images/bb",id,".jpeg",sep=""))
par(mfrow=c(2,3),mar=c(2.1,4.1,1.1,1.1))
plot(load.image(clImages1[1]),main="cl1")
plot(load.image(clImages1[2]),main="cl1")
plot(load.image(clImages1[3]),main="cl1")
plot(load.image(clImages2[1]),main="cl2")
plot(load.image(clImages2[2]),main="cl2")
plot(load.image(clImages2[3]),main="cl2")

elbow <- function(setDat,maxK){
prop<-rep(0,maxK)
for(k in 1:maxK){
km<-kmeans(setDat, centers=k,nstart = 20)
prop[k]<-km$betweenss/km$totss
}
return(prop)
}
elbow_<-elbow(dat2,15)
plot(elbow_)

km5<-kmeans(dat2,5,nstart=20)
clId1<-dat[km5$cluster==1,1]
clId2<-dat[km5$cluster==2,1]
clId3<-dat[km5$cluster==3,1]
clId4<-dat[km5$cluster==4,1]
clId5<-dat[km5$cluster==5,1]
clImages1<-sapply(clId1,function(id)paste("images/bb",id,".jpeg",sep=""))
clImages2<-sapply(clId2,function(id)paste("images/bb",id,".jpeg",sep=""))
clImages3<-sapply(clId3,function(id)paste("images/bb",id,".jpeg",sep=""))
clImages4<-sapply(clId4,function(id)paste("images/bb",id,".jpeg",sep=""))
clImages5<-sapply(clId5,function(id)paste("images/bb",id,".jpeg",sep=""))
par(mfrow=c(2,3),mar=c(2.1,4.1,1.1,1.1))
plot(load.image(clImages1[1]),main="cl1")
plot(load.image(clImages1[2]),main="cl1")
plot(load.image(clImages2[1]),main="cl2")
plot(load.image(clImages2[2]),main="cl2")
plot(load.image(clImages3[1]),main="cl3")
plot(load.image(clImages3[2]),main="cl3")

plot(load.image(clImages4[1]),main="cl4")
plot(load.image(clImages4[2]),main="cl4")
plot(load.image(clImages5[1]),main="cl5")
plot(load.image(clImages5[2]),main="cl5")

Image Analysis 4!!
k<-5
cols <- rainbow(k)
temp<-as.data.frame(km5$center)
names(temp)<-paste("V",1:dim(temp)[[2]],sep="")
parcoord(temp,col=cols,lwd=2,ylim=c(0,2),var.label = FALSE)
legend("topright",horiz = FALSE, legend=as.character(1:k),col=cols,
lty=rep(1,k),lwd=2,cex=.5,bty="n",ncol=2,title="cluster")
legend("topleft",horiz = FALSE, ncol=3,
legend = paste(names(temp),names(dat[,-1])),cex=.5)

summary(km5$centers)
## mountain tree prettySky person
## Min. :0.0000 Min. :0.3333 Min. :0.0000 Min. :0.1667
## 1st Qu.:0.0000 1st Qu.:0.8333 1st Qu.:0.1667 1st Qu.:0.3333
## Median :0.1250 Median :0.8333 Median :0.2500 Median :0.3750
## Mean :0.2667 Mean :0.7750 Mean :0.4500 Mean :0.5750
## 3rd Qu.:0.3333 3rd Qu.:0.8750 3rd Qu.:0.8333 3rd Qu.:1.0000
## Max. :0.8750 Max. :1.0000 Max. :1.0000 Max. :1.0000
## child animal virginiaTech food entertainment
## Min. :0.0000 Min. :0.000 Min. :0.00 Min. :0 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.00 1st Qu.:0 1st Qu.:0.1250
## Median :0.0000 Median :0.000 Median :0.25 Median :0 Median :0.1667
## Mean :0.1833 Mean :0.075 Mean :0.45 Mean :0 Mean :0.3250
## 3rd Qu.:0.1667 3rd Qu.:0.125 3rd Qu.:1.00 3rd Qu.:0 3rd Qu.:0.3333
## Max. :0.7500 Max. :0.250 Max. :1.00 Max. :0 Max. :1.0000
## city country building
## Min. :0.0000 Min. :0.0000 Min. :0.000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.375
## Median :0.5000 Median :0.1250 Median :0.875
## Mean :0.3667 Mean :0.2667 Mean :0.650
## 3rd Qu.:0.5000 3rd Qu.:0.3333 3rd Qu.:1.000
## Max. :0.8333 Max. :0.8750 Max. :1.000
MDS
- -> 7.
#for continuous data
distPrev<-dist(dat[,-1])
mdsPrev<-cmdscale(distPrev)
plot(mdsPrev, col = cols[km5$cluster],xlab="MDS Coord 1",
ylab="MDS coord 2", main="Original Distance Measure")
legend("bottomright",legend=1:6,pch=1,col=cols,cex=.5)

# for binary data (including image id's)
image_ids <- dat[, 1]
distNew<-dist(dat[,-1],method="binary")
mdsNew<-cmdscale(distNew)
plot(mdsNew, col = cols[km5$cluster],xlab="MDS Coord 1",
ylab="MDS coord 2", main="Original Distance Measure")
legend("bottomright",legend=1:6,pch=1,col=cols,cex=.5)
text(mdsNew, labels=image_ids, pos=1, cex=0.7, col=cols[km5$cluster])

means <- tapply(dat$tree, km5$cluster, mean)
# Create a bar graph for mean variable values across different clusters
barplot(means,
main = "Bar Graph for Different Clusters",
xlab = "Cluster",
ylab = "trees")
